// This proto describes the format of the output profile file from
// the TF-stats tool.
syntax = "proto3";

package tensorflow.profiler;

// A database of TfStatsTables.
message TfStatsDatabase {
  // The table that includes IDLE time.
  TfStatsTable with_idle = 4;
  // The table that excludes IDLE time.
  TfStatsTable without_idle = 5;
  // The type of device used.
  string device_type = 6;
  reserved 1, 2, 3;
}

// A table of TFStatsRecords plus the corresponding pprof keys.
message TfStatsTable {
  // All TfStats records, one for each TF operation.
  repeated TfStatsRecord tf_stats_record = 1;
  // key to the pprof profile for host TF operations.
  string host_tf_pprof_key = 2;
  // key to the pprof profile for device TF operations.
  string device_tf_pprof_key = 3;
}

// There is one TfStatsRecord for each TF operation profiled.
message TfStatsRecord {
  // Rank of this TF-op among all TF-ops.
  uint64 rank = 1;
  // Whether this TF-op is on "Host" or "Device".
  string host_or_device = 2;
  // TF-op type.
  string op_type = 3;
  // TF-op name.
  string op_name = 4;
  // Number of occurrences of the operation.
  int64 occurrences = 5;
  // Total "accumulated" time in micro-seconds that the operation
  // took. If this operation has any children operations,
  // the "accumulated" time includes the time spent inside children.
  double total_time_in_us = 6;
  // Average "accumulated" time in micro-seconds that each
  // occurrence of the operation took.
  double avg_time_in_us = 7;
  // Total "self" time in micro-seconds that the operation took.
  // If this operation has any children operations, the "self" time
  // doesn't include the time spent inside children.
  double total_self_time_in_us = 8;
  // Average "self" time in micro-seconds that the operation took.
  double avg_self_time_in_us = 9;
  // Total "self" time as fraction of the sum of the total self-time
  // of operations run on the device. It is 0 if this op runs on the host.
  double device_total_self_time_as_fraction = 10;
  // Cumulative value of device_total_self_time_as_fraction.
  double device_cumulative_total_self_time_as_fraction = 11;
  // Total "self" time as fraction of the sum of the total self-time
  // of operations run on the host. It is 0 if this op runs on the device.
  double host_total_self_time_as_fraction = 12;
  // Cumulative value of host_total_self_time_as_fraction.
  double host_cumulative_total_self_time_as_fraction = 13;
  // Number of floating-point operations (FLOPs) performed per
  // second.
  double measured_flop_rate = 14;
  // Number of bytes (including both read and write) accessed per
  // second.
  double measured_memory_bw = 15;
  // Operational intensity, which is defined as FLOPs/bytes-accessed.
  double operational_intensity = 16;
  // Whether this operation is "Compute" or "Memory" bound,
  // according to the Roofline Model.
  string bound_by = 17;
  // Whether this TF-op is eagerly executed.
  bool is_eager = 18;
  // Fraction of kernel time that utilizes GPU TensorCore.
  // It is 0.0 if this op does not run on a GPU device.
  double gpu_tensorcore_utilization = 19;
}
